# -*- coding: utf-8 -*-
'''
Created on 2017年4月8日

@author: ZhuJiahui
'''

import os
import time
from file_utils.file_writer import quick_write_1d_to_text
from global_info.global_nlp import GlobalNLP

def embedding_transform(selected_word_list, read_filename, write_filename1, write_filename2):
    
    embedding_to_write = []
    word_to_write = []
    
    with open(read_filename, 'r', encoding="utf-8") as f:
        for each_line in f:
            split_line = each_line.strip().split()
            this_word = split_line[0]
            if (this_word in selected_word_list):
                embedding_to_write.append("\t".join(split_line[1:]))
                word_to_write.append(this_word)
    
    quick_write_1d_to_text(write_filename1, embedding_to_write)
    quick_write_1d_to_text(write_filename2, word_to_write)
           

if __name__ == '__main__':
    start = time.clock()    
    now_directory = os.getcwd()
    root_directory = os.path.dirname(now_directory) + '/'
    read_filename1 = root_directory + u'dataset/sogou/zhwiki_word_embedding.txt'
    read_filename2 = root_directory + u'dataset/sogou/train_feature_words.txt'
    
    write_filename1 = root_directory + u'dataset/sogou/visual_word_embedding.tsv'
    write_filename2 = root_directory + u'dataset/sogou/visual_word_list.tsv'
    
    selected_word_list = []
    with open(read_filename2, 'r', encoding="gbk") as f:
        for each_line in f:
            selected_word_list.append(each_line.strip().split(GlobalNLP.CN_WORD_INNER_DELIMITER)[0])
    
    embedding_transform(selected_word_list, read_filename1, write_filename1, write_filename2)
        
    print('Total time %f seconds' % (time.clock() - start))